---
title: "Replication code and tutorial for THE ART OF SELF-CRITICISM: HOW AUTOCRATS PROPAGATE THEIR OWN POLITICAL SCANDALS"
author: "Charles Chang, Duke Kunshan University"
output: html_document
---

## Set R working directory as current directory

```{r, setup, include=FALSE}
rm(list = ls())

knitr::opts_knit$set(root.dir = getwd())

```


## PostgreSQL and pgAdmin 4 installation

This replication uses PostgreSQL for data management. Before running the replication code, you will need to install PostgreSQL database.
PostgreSQL is a free and open-source relational database management system. To install it, simply download it from its [official website](https://www.enterprisedb.com/downloads/postgres-postgresql-downloads?ref=geekbits.io). I recommend the latest version (PostgreSQL 15.2 is used here). In addition, I recommend install pgAdmin4, which is a free, open-source PostgreSQL database administration GUI. In **Stack Builder**, which is used to built additional components of PostgreSQL, you will have the option to install pgAdmin4 by checking its box. Alternatively, it can be downloaded [here](https://www.pgadmin.org/download/pgadmin-4-windows/?ref=geekbits.io). In this tutorial, pgAdmin4 version 6.15 is used.  

Before using them, you will also need to set up Environment Variables for both software. There are multiple ways to set these variables, one of which is to use the pgAdmin4 GUI. In pgAdmin4, click **File -> Preference**. In the pop-up window, navigate the left panel to choose **Paths -> Binary paths**, and then configure the directory for the Binary Path of your Database Server. The binary path should be the installation directory of your PostgreSQL. For example, I installed in on my Windows 10 desktop and my setup is shown as the ![screenshot below](pgAdmin_binary_path_setup.png).

Once setup is complete, you can restore the PostgreSQL backup file to your Database. In pgAdmin4, you can create a new database in your PostgreSQL, for example, by naming it *replication*. Then right click *replication* and choose *Restore...*. A new *Restore* pop-up window will appear. In the *Filename* under *General*, click the folder and choose *pc_replication.backup* in the replication materials you have downloaded. Then click *Restore*. PostgreSQL will take several minutes to restore the file. Once complete, you will be able to see the data tables under *replication -> Schemas -> public -> Tables*.  

## Configure R and connect with the PostgreSQL database 


```{r}
## install.packages("devtools")
#devtools::install_github("RcppCore/Rcpp")
#devtools::install_github("rstats-db/DBI")
#devtools::install_github("rstats-db/RPostgres")

library(RPostgres)
library(DBI)


## Change the following setup parameters to yours

# password for your PostgreSQL database
pw<- {
  "postgres"
}

con <- dbConnect(RPostgres::Postgres()
     , host='localhost'  # name of your PostgreSQL host. You can find all the settings in your PostgreSQL/pgAdmin4
     , port='5432'       # your port
     , dbname='replication_20230914' # name of your database
     # , dbname='pc_replication' # name of your database
     , user='postgres' # user name
     , password=pw)


rm(pw) # removes the password

```


## List all the tables in the database
## Some of the tables listed here are created by the system.
```{r}
dbListTables(con)
```


## Query the database - Count news articles that were published between June 15, 2010 and June 15, 2015
```{r}
count_news_articles <- dbGetQuery(con, "SELECT COUNT(*) FROM sinanews WHERE time::timestamp BETWEEN '20100615' AND '20150615'")
count_news_articles
```

## Count 3,244,019 Weibo users with unique Sina IDs.

```{r}
select_unique_weibo_users <- "SELECT COUNT(DISTINCT(uid))
FROM geotagweibousers"

count_unique_weibo_user_id <- dbGetQuery(con, select_unique_weibo_users)
count_unique_weibo_user_id 

```


## Count the visible comments elicited by the news articles
```{r}

select_comments_on_news <- "SELECT COUNT(DISTINCT(cmt.mid)) 
FROM sinanewscomments AS cmt, sinanews as news
WHERE cmt.newsid = news.newsid
AND news.time::timestamp BETWEEN '20100615' AND '20150615'"

count_comments_on_news <- dbGetQuery(con, select_comments_on_news)
count_comments_on_news  

```

## Count the comments made by the unique Weibo users
```{r}

select_individual_user_comments <- "SELECT COUNT(DISTINCT(cmt.mid)) 
FROM sinanewscomments AS cmt, sinanews as news, geotagweibousers as usr
WHERE cmt.newsid = news.newsid
AND news.time::timestamp BETWEEN '20100615' AND '20150615'
AND cmt.uid = usr.uid"

count_individual_user_comments <- dbGetQuery(con, select_individual_user_comments)
count_individual_user_comments

```

## Show all the corrupt officials used in the study -- Appendix Table. A.1
```{r}
select_corrupt_officials <- "SELECT * FROM corruptofficials"

count_corrupt_officials <- dbGetQuery(con, select_corrupt_officials)
count_corrupt_officials
```


## Update Individual User Comments in Sina News Comments -- Optional to run here
# ```{r}
# update_comments <- dbGetQuery(con, "UPDATE sinanewscomments
# SET authentic = 1
# FROM geotagweibousers
# WHERE sinanewscomments.uid = geotagweibousers.uid")
# 
# update_comments 
# ```



## Update the count of posts from paid commentators in Sina News  -- Optional to run here
## Note: the warning of dbFetch call can be ignored
# ```{r}
# 
# 
# update_paidCommentators <- dbGetQuery(con, "UPDATE sinanews
# SET paidcmt = foo.paidcmt
# FROM 
# (SELECT count(DISTINCT(cmt.uid)) AS paidcmt, cmt.newsid
# FROM sinanewscomments AS cmt, (SELECT DISTINCT(uid)
# FROM
# (
# SELECT DISTINCT(uid)
# FROM knownpaidcommentators1
# UNION
# (SELECT DISTINCT(uid)
# FROM knownpaidcommentators2)
# ) AS foo) AS fc
# WHERE cmt.uid = fc.uid
# GROUP BY cmt.newsid) AS foo
# WHERE sinanews.newsid = foo.newsid")
# 
# ```

## Update the count of posts from individual Weibo users in Sina News -- Optional to run here
# ```{r}
# 
# update_individualUserComments <- dbGetQuery(con, "UPDATE sinanews 
# SET geotagcmt = foo.autcmt
# FROM
# (SELECT sc.autoid, count(cmt.authentic) as autcmt
# FROM sinanews as sc, sinanewscomments as cmt
# WHERE sc.newsid = cmt.newsid
# AND cmt.authentic = 1
# GROUP BY sc.autoid) as foo
# WHERE sinanews.autoid = foo.autoid")
# 
# ```



####################################
###### Main Analysis ###############
####################################

## We can also include all the variables of sina news to R

```{r}
select_sql <- "SELECT create_date, time, newsid, leadernews, autoid, geotagcmt, cmt, paidcmt, bertp, bertac, governmentsource, governmentmedia, subsidizedmedia, commercialmedia, name_en 
FROM sinanews
WHERE time::timestamp > '20100615' AND time::timestamp < '20150615'"

out <- dbGetQuery(con, select_sql)

dim(out) # check the dimension
```


## prepare data

```{r}
library(dplyr)
library('fastDummies')

# binary variable -- whether there is any authentic users' comments
out$geotagcmtbi <- ifelse(out$geotagcmt > 0, 1, 0)

# convert autoid to character
out$autoid <- as.character(out$autoid)

# create a binary variable to mark the beginning of the anti-corruption campaign
out<- transform(out, campaign = ifelse(as.Date(create_date, "%Y-%m-%d") > as.Date("2012-12-15"), 1, 0))

# format data type

out$governmentsource <- as.numeric(out$governmentsource)

out$governmentmedia <- as.numeric(out$governmentmedia)

out$subsidizedmedia <- as.numeric(out$subsidizedmedia)

out$commercialmedia <- as.numeric(out$commercialmedia)

# create mediatype and government official source

out <- out %>%
  mutate(mediatype = case_when(governmentsource == 1 | governmentmedia ==  1 ~ 'government', 
                            subsidizedmedia == 1 ~ 'subsidizedmedia',
                            commercialmedia == 1 ~ 'commercialmedia')
                            )

out <- out %>% mutate(officialsource = ifelse(mediatype == "government", 1, 0))

# out$newsid2 <- ifelse(is.null(out$newsid), out$autoid, out$newsid)

out[is.na(out)] <- 0

# create base reference for comparison
# na is irrelevant to anticorruption
# oc is ordinary corruption stories about local officials/government
# Cat is corruption stories about ministerial/provincial level officials
# Tiger is corruption stories about national leaders/officials

out$corruption <- factor(out$bertac, levels = c("na", "oc", "Cat", "Tiger"), labels = c("unrelated", "ordinary", "ministerial", "national"))

# create dummy variables from bertac
out <- dummy_cols(out, select_columns = 'corruption')

out$media.factor <- factor(out$mediatype, levels = c("commercialmedia","government", "subsidizedmedia"))


# create a serial number of days
out$create_date <- as.Date(out$create_date)

out$newsday <- as.numeric(out$create_date  - as.Date("2010-06-15"))

out$geotagcmt <- as.numeric(out$geotagcmt)
  
summary(out)

```


## Altogether, in my 203,492 Sina News domestic news stories, I identify 655 stories about reportedly corrupt state-level officials, 1,426 stories about reportedly corrupt ministerial officials, and 17,876 other corruption-related stories. The remaining 183,535 stories are unrelated to corruption
```{r}
out %>% count(corruption)
```


## Appendix D - Descriptive Statistics
```{r}
library(stargazer)

df <- out[c("geotagcmt", "cmt", "campaign", "newsday", "leadernews", "governmentsource", "governmentmedia", "subsidizedmedia", "commercialmedia", "corruption_unrelated", "corruption_ordinary", "corruption_ministerial", "corruption_national")]

stargazer(df, type = "text", title = "Descriptive statistics", digits = 2, out = "./DescriptiveStats.txt",
          covariate.labels=c("Individual User Comments", "Comments", "Corruption Campaign (binary)", "Publication Date", "Leader News (binary)", "Government Agency (binary)", "Government Media (binary)", "Government-subsidized Media (Binary)", "Commercial Media (Binary)",
                             "Corruption Unrelated (Binary)", "Ordinary Corruption (Binary)", "Ministerial-level Corruption (Binary)", "National-level Corruption (Binary)") )

```

## 1.1 Basic regressions using negative binomial - Table E.1
```{r}
library(fixest)
library(pscl)
library(MASS)

# set names for all variables
setFixest_dict(c(geotagcmt = "Individual user comments", smpbjkmgzcdcmt = "all-time user comments", governmentmedia = "Government media (binary)", subsidizedmedia = "Subsidized media (binary)", campaign = "Campaign (binary)", bertac.factorTiger = "National leaders (binary)", bertac.factorCat = "Ministerial leaders (binary)", bertac.factoroc = "Ordinary corruption (binary)", cmt = "Comments", create_date = "Publication Date"))



negbin_base <- glm.nb(geotagcmt ~ corruption, data = out)

negbin_base_2 <- glm.nb(geotagcmt ~ campaign, data = out)

# the formal test of overdispersion
# odTest(negbin_base)
# odTest(negbin_base_2)

# AIC
# AIC(negbin_base)
# AIC(negbin_base_2)

# BIC
# BIC(negbin_base)
# BIC(negbin_base_2)

stargazer(negbin_base, negbin_base_2, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2,
          dep.var.labels = ("individual user comments"),
          covariate.labels = c("ordinary corruption", "ministerial corruption", "national corruption"))

```


## 1.2 Same regression but using poisson distribution - Table E.3 
```{r}

pois_base <- fepois(geotagcmt ~ corruption, data = out)

pois_base_2 <- fepois(geotagcmt ~ campaign, data = out)

etable(pois_base, pois_base_2)

```



## 2.1 Baseline DID with the interaction term between the rank of corrupt officials and the campaign using negative binomial -- Table E.2

```{r}

negbin2 <- glm.nb(geotagcmt ~ campaign*corruption,
             data = out)

# the formal test of overdispersion
# odTest(negbin2)

# AIC
# AIC(negbin2)

# BIC
# BIC(negbin2)

# stargazer(negbin2, type="text")
stargazer(negbin2, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2,
          dep.var.labels = ("individual user comments"),
          covariate.labels = c("campaign", "ordinary corruption", "ministerial corruption", "national corruption"))

```



## 2.2 Same regression as above but using poisson distribution -- Table E.4

```{r}

pois2 <- fepois(geotagcmt ~ campaign*corruption,
             data = out)

setFixest_etable(digits = 3, digits.stats = 4)

etable(pois2)

```


## 3.1 Main DDD test using negative binomial distribution - Table 1 and 2
```{r}

# Setting a dictionary 
setFixest_dict(c(geotagcmt = "Individual user comments", governmentmedia = "Government media (binary)", subsidizedmedia = "Government subsidized media", campaign = "Campaign (binary)", bertp = "Corruption news (binary)", bertac.factor_Tiger = "National leaders (binary)", bertac.factor_Cat = "Ministerial leaders (binary)", bertac.factor_oc = "Ordinary corruption (binary)", cmt = "Comments"))


negbin_ddd <- glm.nb(geotagcmt ~ governmentmedia*campaign*corruption, data = out)
# summary(negbin_ddd)

# the formal test of overdispersion
# odTest(negbin_ddd)

# AIC
# AIC(negbin_ddd)

# BIC
# BIC(negbin_ddd)

# Table 1
stargazer(negbin_ddd, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2,
          dep.var.labels = ("individual user comments"),
          covariate.labels = c("government media","campaign", "ordinary corruption", "ministerial corruption", "national corruption"))

negbin_ddd2 <- glm.nb(geotagcmt ~ subsidizedmedia*campaign*corruption, data = out)

# summary(negbin_ddd2)

# the formal test of overdispersion
# odTest(negbin_ddd2)

# AIC
# AIC(negbin_ddd2)

# BIC
# BIC(negbin_ddd2)


# Table 2
stargazer(negbin_ddd2, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2,
          dep.var.labels = ("individual user comments"),
          covariate.labels = c("subsidized media","campaign", "ordinary corruption", "ministerial corruption", "national corruption"))


```


## Generating Predicted Counts after Negative Binomial

## Plotting Figure 2

```{r}
library(ggplot2)
library(visreg)

negbin_vis <- glm.nb(geotagcmt ~ governmentmedia*campaign*corruption_national + governmentmedia*campaign*corruption_ministerial + governmentmedia*campaign*corruption_ordinary, data = out)


# png(file = "./Predicted_counts_governmentMedia_comments.png",   # The directory you want to save the file in
#     width = 4800, height = 3600,
#     res = 600)
# 
# This figure shows an intermediate result
visreg_graph <- visreg(negbin_vis, "governmentmedia", by="corruption_ordinary", scale="response", overlay=TRUE, xlab="Government media (binary)", ylab="Individual user comments") 
# 
# dev.off()

treatment_min <- min(visreg_graph$fit$governmentmedia)
treatment_max <- max(visreg_graph$fit$governmentmedia)

tibble.data <-data.frame(visreg_graph$fit)

graph.data<- tibble.data %>% filter(governmentmedia == treatment_min |
                     governmentmedia == treatment_max) %>% 
  mutate(Corruption = factor(corruption_ordinary))
p <- graph.data %>%
  ggplot(aes(x=governmentmedia, y=visregFit, group=Corruption, color=Corruption, shape=Corruption)) +
  geom_pointrange(aes(
    ymin=visregLwr,
    ymax=visregUpr)
  )

# png(file = "./Predicted_counts_governmentMedia_individualuserComments.png",   # The directory you want to save the file in
#     width = 4800, height = 3600,
#     res = 600)

# Figure 2
p + scale_x_continuous(breaks = c(0,1)) + 
  geom_point(size=3, fill="white") +
  labs(x = "Government media (binary)", y = "Predicted count of comments") 

# dev.off()

```

## Visualization for subsidized media - Plotting Figure 3

```{r}

negbin_vis2 <- glm.nb(geotagcmt ~ subsidizedmedia*campaign*corruption_national + subsidizedmedia*campaign*corruption_ministerial + subsidizedmedia*campaign*corruption_ordinary, data = out)

# png(file = "./Predicted_counts_subsidizedMedia_comments.png",   # The directory you want to save the file in
#     width = 4800, height = 3600,
#     res = 600)
# 
# this figure shows an intermediate result
visreg_graph2 <- visreg(negbin_vis2, "subsidizedmedia", by="corruption_ordinary", scale="response", overlay=TRUE, xlab="Subsidized media (binary)", ylab="Individual user comments") 
# 
# dev.off()

treatment_min <- min(visreg_graph2$fit$subsidizedmedia)
treatment_max <- max(visreg_graph2$fit$subsidizedmedia)

tibble.data <-data.frame(visreg_graph2$fit)

graph.data<- tibble.data %>% filter(subsidizedmedia == treatment_min |
                     subsidizedmedia == treatment_max) %>% 
  mutate(Corruption = factor(corruption_ordinary))

p <- graph.data %>%
  ggplot(aes(x=subsidizedmedia, y=visregFit, group=Corruption, color=Corruption, shape=Corruption)) +
  geom_pointrange(aes(
    ymin=visregLwr,
    ymax=visregUpr)
  )

# png(file = "./Predicted_counts_subsidizedMedia_individualuserComments.png",   # The directory you want to save the file in
#     width = 4800, height = 3600,
#     res = 600)

# Figure 3
p + scale_x_continuous(breaks = c(0,1)) + 
  geom_point(size=3, fill="white") +
  labs(x = "Subsidized media (binary)", y = "Predicted count of comments") 

# dev.off()


```



## 3.2 Triple difference regressions with Poisson distribution - Table E.5 and E.6

```{r}

poi_ddd <- fepois(geotagcmt ~ subsidizedmedia*campaign*corruption, data = out)

poi_ddd2 <- fepois(geotagcmt ~ governmentmedia*campaign*corruption, data = out)


setFixest_etable(digits = 3, digits.stats = 4)

etable(poi_ddd2)

etable(poi_ddd)
```




## Robustness Checks -- leader news Table H1 - H4

```{r}

rbt_base <- glm.nb(geotagcmt ~ corruption + leadernews, data = out)


rbt2 <- glm.nb(geotagcmt ~ campaign*corruption + leadernews,
             data = out)

## subsidized media
rbt3 <- glm.nb(geotagcmt ~ subsidizedmedia*campaign*corruption + leadernews, data = out)

## government media
rbt4 <- glm.nb(geotagcmt ~ governmentmedia*campaign*corruption + leadernews, data = out)

stargazer(rbt_base, rbt2, rbt3, rbt4, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2)
```

## Robustnes check -- paid commentators Table H5 - H8
```{r}
out$paidcmt <- as.numeric(out$paidcmt)

out[is.na(out)] <- 0

rbt_base2 <- glm.nb(geotagcmt ~ corruption + paidcmt, data = out)

rbt5 <- glm.nb(geotagcmt ~ campaign*corruption + paidcmt, data = out)

rbt6 <- glm.nb(geotagcmt ~ subsidizedmedia*campaign*corruption + paidcmt, data = out)

rbt7 <- glm.nb(geotagcmt ~ governmentmedia*campaign*corruption + paidcmt, data = out)

stargazer(rbt_base2, rbt5, rbt6, rbt7, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2)
```



## Parallel trend testing -- Table F1 - F3
```{r}

pt1 <- glm.nb(bertp ~ campaign*corruption, data = out)


pt2 <- glm.nb(bertp ~ subsidizedmedia*campaign*corruption, data = out)


pt3 <- glm.nb(bertp ~ governmentmedia*campaign*corruption, data = out)

stargazer(pt1, pt2, pt3, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2,
          dep.var.labels = ("corruption-related news (binary)"))

```


## Government media over the campaign -- Response letter
# ```{r}
# 
# out$officialsource <- ifelse(out$governmentmedia == 1|out$governmentsource == 1, 1, 0)
# 
# glm1 <- glm(officialsource ~ campaign, family = "binomial", data = out)
# 
# stargazer(glm1, type="text", star.cutoffs=c(0.05, 0.01, 0.001), digits = 2)
# ```


## Load data for Synthetic Control Method

```{r}

sql_aggregate_by_day = "SELECT extract (day FROM ts ) as day, extract (month FROM ts ) as month, extract ( year FROM ts ) as year, ts, name_en, 
 coalesce(geotagcmt,0) as geotagcmt, coalesce(cmt,0) as cmt,
 coalesce(leadernews,0) as leadernews, coalesce(governmentmedia,0) as governmentmedia,
 coalesce(subsidizedmedia,0) as subsidizedmedia, coalesce(commercialmedia,0) as commercialmedia,
 coalesce(paidcmt,0) as paidcmt
FROM 
(
    SELECT  extract ( day FROM time::timestamp ) as day, extract ( month FROM time::timestamp ) as month , extract ( year FROM time::timestamp ) as year, 
	avg(geotagcmt) as geotagcmt, avg(cmt) as cmt, avg(leadernews) as leadernews, avg(governmentmedia) as governmentmedia, 
	avg(subsidizedmedia) as subsidizedmedia, avg(commercialmedia) as commercialmedia, 
	avg(paidcmt) as paidcmt, name_en
    FROM    sinanews
    where time::timestamp > '2010-06-15' and time::timestamp <'2015-06-15'
    GROUP BY  extract ( day FROM time::timestamp ), extract ( month FROM time::timestamp ) , extract ( year FROM time::timestamp ), name_en 
) AS cnt 
 right outer join ( SELECT * FROM generate_series('2010-06-15'::timestamp, '2015-06-15 00:00', '1 day') AS ts ) as datetable 
 on  extract (day FROM ts ) = cnt.day and extract (month FROM ts ) = cnt.month and extract ( year FROM ts ) = cnt.year 
 order by year, month asc"

actemp <- dbGetQuery(con, sql_aggregate_by_day)

dim(actemp) # check the dimension


```


## Preparing the dataset
```{r}

## Set a couple of printing options
options(digits = 12)
options(digits.secs = 4)

actemp$pt <- as.POSIXct(actemp$ts)

# actemp$pt <- strptime(actemp$ts, format = "%Y-%m-%d %H:%M:%S")

#convert character to timestamp
# actemp$date <- strptime(actemp$pt, "%Y-%m-%d")

actemp$date <- as.Date(actemp$pt)

# actemp$gid <- as.integer(actemp$gid)

# replace NULL with 0
actemp[is.na(actemp)] <- 0

# binary variable -- whether there is any authentic users' comments
actemp$tiger <- ifelse(actemp$name_en == "Bo Xilai" | actemp$name_en == "Zhou Yongkang" | actemp$name_en == "Xu Caihou" | actemp$name_en == "Ling Jihua" | actemp$name_en == "Su Rong", 1, 0)

actemp$campaign <- ifelse(actemp$date > "2012-11-15", 1, 0)

# actemp$treated <- as.integer(actemp$treated)

summary(actemp)
```




```{r}
library(tidyverse)

actemp <- actemp %>% dplyr::select(leadernews, geotagcmt, cmt, governmentmedia, subsidizedmedia, commercialmedia, paidcmt, tiger, campaign, name_en, date)

actemp <- complete(actemp, name_en, date)

actemp[is.na(actemp)] <- 0

summary(actemp)



```

## panel view 

```{r}
# library(panelView)
# 
# actemp <- distinct(actemp, name_en, date, .keep_all= TRUE)
# 
# panelview(geotagcmt ~ governmentmedia + commercialmedia + subsidizedmedia + leadernews + paidcmt + tiger + campaign, data = actemp,  index = c("name_en","date"), pre.post = TRUE)

```

## Generalized Synthetic Control
```{r}
library(gsynth)
gsy_subsidize <- gsynth(geotagcmt ~  subsidizedmedia + tiger + governmentmedia + commercialmedia + campaign,
              data = actemp, index = c("name_en","date"),
              se = TRUE, inference = "parametric",
              r = c(0,5), CV = FALSE, force = "two-way",
              parallel = TRUE, cores = 4, 
              min.T0 = 7, nboots = 2000, seed = 24569)
```

## Figure 4
## Plot synthesized control and counterfactual -- truncate the range of y to 20 and x to 5 days before and after news publication
```{r}
# png(file = "./result_synthetic_control_subsidizedMedia_by_day_counterfactual_truncated.png",   
# width = 4800, height = 3600,
# res = 600)
plot(gsy_subsidize, type = "counterfactual", ylim = c(0, 20), xlim = c(-5,5))

# dev.off()
```

## Estimated ATT
# ```{r}
# 
# # save image
# png(file = "./estimated_ATT_synthetic_control_subsidizedMedia_by_day.png",
# width = 4800, height = 3600,
# res = 600)
# 
# 
# plot(gsy_subsidize, type = "gap", ylim = c(-5, 5), xlim = c(-5, 5))
# 
# 
# 
# dev.off()
# ```

## Cumulative treatment effects

# ```{r}
# 
# png(file = "./missing_synthetic_control_subsidizedMedia_by_day.png",   # The directory you want to save the file in
# width = 4800, height = 3600,
# res = 600)
# 
# plot(gsy_subsidize, type = "missing", ylim = c(-5, 5), xlim = c(-5, 5))
# 
# dev.off()
# 
# print(gsy_subsidize)
# 
# cumu1 <- cumuEff(gsy_subsidize, cumu = TRUE, id = NULL, period = c(0,10))
# cumu1$est.catt
# ```


## government media

```{r}

gsy_government <- gsynth(geotagcmt ~  governmentmedia + tiger + subsidizedmedia + commercialmedia + campaign,
              data = actemp, index = c("name_en","date"),
              se = TRUE, inference = "parametric",
              r = c(0,5), CV = FALSE, force = "two-way",
              parallel = TRUE, cores = 4, 
              min.T0 = 7, nboots = 2000, seed = 24569)
```

# Figure 5
## Plot synthesized control and counterfactual -- truncate the range of y to 20 and x to 5 days before and after the news publication
```{r}
# png(file = "./result_synthetic_control_governmentMedia_by_day_counterfactual_truncated.png",   
# width = 4800, height = 3600,
# res = 600)

plot(gsy_government, type = "counterfactual", ylim = c(0, 20), xlim = c(-5,5))

# dev.off()
```

## Plot estimated ATT
```{r}

# png(file = "./estimated_ATT_synthetic_control_governmentMedia_by_day.png",
# width = 4800, height = 3600,
# res = 600)
# 
# # plot(gsy_subsidize, type = "gap", xlab = "Day", 
# #      main = "Estimated ATT", id = gsy_subsidize$name_en, 
# #      axis.adjust=TRUE)
# 
# plot(gsy_government, type = "gap", ylim = c(-5, 5), xlim = c(-5, 5))
# 
# 
# 
# dev.off()
# 

```



## Print out Estimated ATT
```{r}
# print(gsy_government)
# 
# cumu2 <- cumuEff(gsy_government, cumu = TRUE, id = NULL, period = c(0,10))
# cumu2$est.catt

```

## Robustness checks -- control visible comments
```{r}

rbt_subsidize <- gsynth(geotagcmt ~  subsidizedmedia + tiger + governmentmedia + leadernews + commercialmedia + paidcmt + campaign + cmt,
              data = actemp, index = c("name_en","date"),
              se = TRUE, inference = "parametric",
              r = c(0,5), CV = FALSE, force = "two-way",
              parallel = TRUE, cores = 4, 
              min.T0 = 7, nboots = 2000, seed = 24569)
```

## Estimated ATT - Robustness check
```{r}

# save image
# png(file = "./estimated_ATT_GSC_subsidizedMedia_by_day_robustnessCheck.png",
# width = 4800, height = 3600,
# res = 600)


plot(rbt_subsidize, type = "gap", ylim = c(-5, 5), xlim = c(-5, 5))



# dev.off()
```
## Robustness checks -- control visible comments, government media
```{r}

rbt_government <- gsynth(geotagcmt ~  governmentmedia + tiger + subsidizedmedia + leadernews + commercialmedia + paidcmt + campaign + cmt,
              data = actemp, index = c("name_en","date"),
              se = TRUE, inference = "parametric",
              r = c(0,5), CV = FALSE, force = "two-way",
              parallel = TRUE, cores = 4, 
              min.T0 = 7, nboots = 2000, seed = 24569)

```

```{r}

# save image
# png(file = "./estimated_ATT_GSC_governmentMedia_by_day_robustnessCheck.png",
# width = 4800, height = 3600,
# res = 600)


plot(rbt_government, type = "gap", ylim = c(-5, 5), xlim = c(-5, 5))



# dev.off()
```

## Disconnect the database connection manually
```{r}
dbDisconnect(con)
```



```



